package ec.edu.ups.cidi.siia.plamdep.comunicacionweb.extractorlinks;
import java.util.ArrayList;

import ec.edu.ups.cidi.siia.plamdep.comunicacionweb.Utils.HtmlUtils;
public class LinksWebProxy implements IExtractorLinks
{
    private ArrayList<String> resultados=new ArrayList<String>(); //el listado de links encontrados
    private HtmlUtils utils=new HtmlUtils();

    public void sacaLinksBing(String html)
    {
        html=html.replaceAll("\n"," ");
        ArrayList<String> ps=this.utils.obtenerEtiqueta2(html,"<p","/p>", true, 0);
        for(String p:ps)
        {
            ArrayList<String> href=this.utils.obtenerEtiqueta2(p,"href=\"","\">", false, 0);
            if(href.size()>0)
            {
                String link=href.get(0).replace("href=\"","").replace("\">","");
                link=link.replace("http://webproxy.net/view?q=http%3A%2F%2Fwww.google.com.ec%2Furl%3Fq%3D", "");
                link=link.replaceAll("%3A%2F%2F", "://");
                link=link.replaceAll("%2F", "/");
                link=link.replaceAll("%2525C3%2525B3", "ó");
                link=link.replaceAll("%2525", "%");
                link=this.utils.obtenerEtiqueta2(link, "http", "%26amp|%26amp%", false, 0).get(0);
                link=link.replaceAll("%26amp", "");
                link=link.replaceAll("&q[0-9]=", "");
                link=link.replaceAll("%26amp%3Bsa%3DU", "");
               if(link.startsWith("http://webproxy")==false)
                resultados.add(link);
            }    
        }
    }
    
    public ArrayList<String> getLinks() {
        return this.resultados;
    }
    
    public void setContenidoHTML(String html) {
        this.sacaLinksBing(html);    
    }
}